| title: “A Mixture of Flexible Multivariate Distributions” |
| output: html_document |
In this document, I will compute the posterior distribution for \(\beta_{j}\) using the derivation in the document `next Steps’, where the prior on \(\beta_{j}\) is modeled as a mixture of multivariate normals. In this portion, I train on 10000 gene snp pairs and then test on 40000.
setwd("~/Dropbox/cyclingstatistician/beta_gp_continuous/")
tuto.dir=getwd()
##' @param b.gp.hat PxR matrix of standardized effect sizes across all `R' tissue types,
##' @param se.gp.hat RxR estimated covariance matrix of standardized standard errors, corresponding to the MLE of genotype effect for a given gene-SNP pair in all tissues
##' @param t.stat PxR matrix of t statistics for each gene-snp Pair across all R tissues
##' @param U.0kl RxR prior covariance matrix for posterior.covariance matrix K and weight l
##' @param pi LxK matrix of prior weights estimated from the EM algorithm which correspond to optimal weighting of prior covariance matrix
b.gp.hat=na.omit(read.table("16008genesnppairs_43tissues_beta.hat.std.txt",header=F,skip=1)[,-c(1,2)])
#b.gp.hat=na.omit(read.table("50186simulatedbeta.hat.std.txt",header=F,skip=1)[,-c(1,2)])[1:10000,]
se.gp.hat=na.omit(read.table("16008genesnppairs_43tissues_sigma.hat.std.txt",header=F,skip=1)[,-c(1,2)])
#se.gp.hat=na.omit(read.table("50186simulatedse.hat.std.txt",header=F,skip=1)[,-c(1,2)])[1:10000,]
t.stat=na.omit(read.table("16008genesnppairs_43tissues_t.stat.txt",header=F,skip=1)[,-c(1,2)])
#t.stat=na.omit(read.table("50186simulatedt.hat.std.txt",header=F,skip=1)[,-c(1,2)])[1:10000,]
#p.vals=na.omit(read.table("16008genesnppairs_43tissues_pval.txt",header=F,skip=1)[,-c(1,2)])
(J=nrow(b.gp.hat))
## [1] 6904
L = 5 ## number of omegas to use
R=ncol(b.gp.hat)#number of tissues
X.t=as.matrix(t.stat)
X.c=apply(X.t,2,function(x) x-mean(x)) ##Column centered matrix of t statistics
R=ncol(X.t)
M=nrow(X.t)
#colMeans(X.c)
Now, we need to load in the prior matrices which we will try to find the optimal combination. First we load in the K' RxR prior covariance matrices specifying the relative importance of a particular tissue direction in each component. Now, for every prior covariance matrix $U_{k}$, we compute Lstretches’ specifying the width of this distribution. Thus for each stretch \(\omega\), there will be K corresponding covariance matrices.
##' @param function to return the LxK component list of prior covariance matrices for a table of omegas
##' @param X.c a matrix of column center tstatistics
##' @param P number of PCs to keep in approximation
##' @param omega.table dataframe of stretches
##' @return return L dimensional list of K dimensional lists where each L,K contains the Lth grid component times the K covariance matrix
omega.table=read.table("~/Dropbox/cyclingstatistician/beta_gp_continuous/omega2.txt")
get.prior.covar.Ukl=function(P,L,R,omega.table){
test=list()
for(l in 1:L){
test[[l]]=list()
omega=omega.table[l,]
test[[l]][[1]]=omega*diag(1,R)# the first covariance matrix will be the 'sopped up' identity
S=cov2cor((t(X.c)%*%X.c)/M)
test[[l]][[2]]=omega*(S)
svd.X=svd(X.c)##perform SVD on sample centered
#svd.X=svd(X.t)##peform SVD on unsample centered (same result for v)
v=svd.X$v;u=svd.X$u;d=svd.X$d
cov.pc=1/M*v[,1:P]%*%diag(d[1:P])%*%t(u[,1:P])%*%t(v[,1:P]%*%diag(d[1:P])%*%t(u[,1:P]))##Use the rank P summary representation
test[[l]][[3]]=omega*cov2cor(cov.pc)}
return(U.0kl=test)}
Now that we have the prior covariance matrices, we can maximize the likelihood \(L(\pi;\hat\beta_{j},se_{j})\) by computing \(Pr(\hat \beta_{j} | Component k,l)\) for each gene componenet at each each gene SNP pair and then finding the optimal combination among all gene SNP pairs.
##' @return Compute a likelihood using the prior covariance matrix for each gene SNP pair in the rows and componenets in the columns
##' @param b.gp.hat and se.gp.hat matrix of MLES for all J gene snp pairs
##'
##' @param U.0kl L dimensional list of K dimensional list with prior covairnace matrix for each grid weight, prior covariance pair
#install.packages("SQUAREM") note you'll need to have SQUAREM installed
library("SQUAREM")
L=5
#R=5
R=43
U.0kl=get.prior.covar.Ukl(P=5,L,R,omega.table=omega.table)
(K=length(U.0kl[[1]]))
## [1] 3
(L=length(U.0kl))
## [1] 5
It might also be useful to generate one set of lists (i.e., a KxL component ‘mega’ list which concatenates all the componenet into a large object.
##' generate a KxL list where the first K elements correspond to omega1 times the first k, the second K elements correspond to ##' omega.2 times the second K, etc.
covmat=unlist(U.0kl,recursive=FALSE)
head(covmat[[1]])
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## [1,] 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35]
## [1,] 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0
## [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43]
## [1,] 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0
head(covmat[[K+1]])
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 0.0075 0.0000 0.0000 0.0000 0.0000 0.0000 0 0 0 0 0
## [2,] 0.0000 0.0075 0.0000 0.0000 0.0000 0.0000 0 0 0 0 0
## [3,] 0.0000 0.0000 0.0075 0.0000 0.0000 0.0000 0 0 0 0 0
## [4,] 0.0000 0.0000 0.0000 0.0075 0.0000 0.0000 0 0 0 0 0
## [5,] 0.0000 0.0000 0.0000 0.0000 0.0075 0.0000 0 0 0 0 0
## [6,] 0.0000 0.0000 0.0000 0.0000 0.0000 0.0075 0 0 0 0 0
## [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22]
## [1,] 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0
## [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33]
## [1,] 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0
## [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43]
## [1,] 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0
Now we compute the likelihood.
##' lik.func computes likelihood for each betahat
##' @param b.mle Rx1 vector of mles
##' @param V.gp.hat RxR matrix of standard errors
##' @param U.0kl L dimensional list of K dimensional list with prior covairnace matrix for each grid weight, prior covariance pair
lik.func=function(b.mle,V.gp.hat,covmat)
{ sapply(seq(1:length(covmat)),function(x){dmvnorm(x=b.mle, sigma=covmat[[x]] + V.gp.hat)})
}
Here, I return a matrix of likelihoods for each componenet of the trainin set, in order to compute the hierachcical weights. REcall:
Here the incomplete-data likelihood function is
\[L(\mathbf{\pi};{\hat{\bm{b}},\mathbf{z}) = P({\hat{\bm{b}}},\mathbf{z}| \theta) = \prod_{j=1}^J \sum_{k,l=1}^{KL} \pi_{kl} \Pr(\hat{\bm{b}}| z_{j}=[k,l])\]
Now, in order to estimate the hierarchical prior weights \(\pi_{k,l}\) we compute the \(KxL\) dimensional likelihood at each each gene snp pair \(j\) by evaluating the probability of observing \(\bm{\hat{\b}_{j}}\) given that we know the true \(\bm{b_{j}}\) arises from component \(k,l\):
\[ \mathcal{L}(\mathbf{\pi_{kl}}; \hat{\bm{b}}_{j}, U_{0,k,l} \hat{V}_{j})\\ &=\Pr(\hat{\bm{b}}_{j}| z_{j}=[k,l])\\ &=\Norm_R(\hat{\bm{b}}_{j}; \bm{0}, U_{0kl'} + \hat{V}_{j}}) \]
Which means we form a \(J\) \(\times\) \(KL\) dimensional matrix entitled `global.lik’ in my .Rmd file,where in each row vector is the probability of the vector of observed MLEs given that the true \(\b_{j}\) arose from element \(K,L\), as specified by its corresponding prior covariance matrix \(\mathbf{U_{0kl}}\) . You simply compute the probability from an \(R\) dimensional multivariate normal with mean \(\mathbf{0}\) and variance \(\mathbf{U_{0kl}}\) + \(\mathbf{\hat{V}_{j}}\). I treat each of the \(j\) rows as an i.i.d. sample from which to maximize the likelihood over using the mixEM algorithm.
##' return a matrix of likielihoods
##' @param b.gp.hat J*R vector of mles for each of J snps
##' @param J=dim(b.gp.hat)
##' @param se.gp.hat J*R vector of standard erros for each of J snps
library("mvtnorm")
J=dim(b.gp.hat)[1]
lik.mat.real.nosfa=t(sapply(seq(1:J),function(x){lik.func(b.mle=b.gp.hat[x,],V.gp.hat=diag(se.gp.hat[x,])^2,covmat)}))
##' @details write to at table for speed
A="real_noSFA"
saveRDS(lik.mat.real.nosfa,paste0("likelihood",A,".rds"))
A="real_noSFA"
lik.mat.real.nosfa=readRDS(paste0("likelihood",A,".rds"))
library("SQUAREM")
source("mixEm.R")
train=lik.mat.real.nosfa[1:1000,]
test=lik.mat.real.nosfa[1001:5000,]
K=3
L=5
##' @describeIn compute HM weights on training and test set for compoarsion
pis=mixEM(matrix_lik=train,prior=rep(1,K*L))
pis2=mixEM(matrix_lik=test,prior=rep(1,K*L))
Now, we can compare the likelihood of the whole data set (the test data set) using the HM weights from the training set.
##` @total.lik.nosfa
##' @details compute dot product as sum_k P(D,Z=K|Z=k) * p(Z=k) where p(Z=k) comes from test
log.lik.nosfa=log(test%*%pis$pihat)
hist(log.lik.nosfa,freq=F)
(sum(log.lik.nosfa))
## [1] 189773.4
A="real.noSFA"
names.vec=matrix(NA,nrow=K,ncol=L)
for(l in 1:L){
for(k in 1:K){
names.vec[k,l]=paste0("k=",k,";l=",l)}}
# write.table(cbind(as.vector(names.vec),as.vector(pis$pihat)),paste0("train",A,"pihat.realdata.txt"))
# write.table(cbind(as.vector(names.vec),as.vector(pis2$pihat)),paste0("test",A,"pihat.realdata.txt"))
We can make some plots to compare the weights between the test and train set.
I compare the realtive order of importance of the components.
a=order(test.weights[,2],decreasing=TRUE)
b=order(train.weights[,2],decreasing=TRUE)
plot(order(test.weights[,2],decreasing=TRUE),order(train.weights[,2],decreasing=TRUE))
sort(test.weights[,2],decreasing=TRUE)
sort(train.weights[,2],decreasing=TRUE)
plot(sort(test.weights[,2],decreasing=TRUE),sort(train.weights[,2],decreasing=TRUE))
par(mfrow=c(2,1))
train=barplot(b,xlab="OrderofComponentImportance",main="TrainingSet")
text(train,b,b,pos=3,offset=0.5)
test=barplot(a,xlab="OrderofComponentImportance",main="TestSet")
text(test,a,a,pos=3,offset=0.5)
sum((a-b)^2/length(a))
sum(pis$pihat-pis2$pihat)^2/length(pis$pihat)
plot(log(train.weights[,2]-test.weights[,2]))^2/length(test.weights[,2])
par(mfrow=c(2,1))
barplot(train.weights[,2],names=seq(1:(K*L)),las=2,main="TrainWeights")
barplot(test.weights[,2],names=seq(1:(K*L)),las=2,main="TestWeights")
##' Here, I plot by L
par(mfrow=c(3,2))
for(l in 1:L){
i=l-1
start=(i*K+1)
end=(l*K)
barplot(pis$pihat[start:end],main="mixEM estimated pi",names=names.vec[start:end],las=2,col=c(1:14))
}
We can see that the majority of the weight is put on the covariance matrix of t statistics, \(X_{t}'X\) and the estimated covariance matrix, \(V_{t,1..K}\lambda^{2} V_{t,1..K}'\) where here I use K = 13. Recall each V is a \(43\) x \(1\) vector.
I compare with naiive iteration, which simply weights the relative likelihood across individuals.
##' @param global.lik Computes the likelihood matrices for each component for each of j gene snp pairs
##' @return global.sum Sums the likelihood for each component across j gene snp pairs
##' @return global.norm Sums the likelihood for all components across all j pairs
global.lik=lik.mat.real.nosfa
updated.weights=function(b.gp.hat,se.gp.hat,global.lik){
global.sums=as.matrix(colSums(global.lik))#relative importance of each componenet across all gene-snp pairs
global.norm=sum(global.sums)
return(updated.weights=global.sums/global.norm)}
x=updated.weights(b.gp.hat,se.gp.hat,global.lik)
barplot(as.vector(x),names=as.vector(names.vec),main="Normalized Likelihood at Each Component")
Here, I’ve plotted the relative importance of each component after one iteration with a uniform prior weight on each \(\pi\).
Recall:
\[L(\beta_{j};k,l) = Pr(\hat{b}_{j} | k,l)\] \[=Pr(\hat{b}_{j}; 0, \omega^{2}_{l} U_{k} + \hat {V})\]
\[w_{jkl} = \frac{\pi_{k,l} L(\beta_{j};k,l)}{\sum_{k,l} \pi_{k,l} L(\beta_{j};k,l)}\]
We can then update our estimate of \(\pi_{k,l}\)
\[\pi_{kl}^{i+1} = \frac{\sum_{j} w_{jkl}}{\sum_{j,k,l} w_{jkl}}\]
Now that we have the hierarchical weights \(\pi_{k,l}\), we can compute the posterior distribution for each component.
For a given prior covariance matrix, compute posterior covariance and posterior mean. Here, let U.0k.l represent a specific matrix in U.0kl (.e.g, \(U.0kl[[l]][[k]]\))
##' @param U.0k.l let U.0k.l represent a specific matrix in U.0kl (.e.g, U.0kl[[l]][[k]])
##' @return post.b.gpkl.cov ## returns an R*R posterior covariance matrix for jth gene snp pair
##' @return post.b.gpkl.mean return a 1 * R vector of posterior means for a given prior covariance matrix
post.b.gpkl.cov <- function(V.gp.hat.inv, U.0k.l){
U.gp1kl <- U.0k.l %*% solve(V.gp.hat.inv %*% U.0k.l + diag(nrow(U.0k.l)))
return(U.gp1kl)
}
post.b.gpkl.mean <- function(b.mle, V.gp.hat.inv, U.gp1kl){
mu.gp1kl <- U.gp1kl %*% V.gp.hat.inv %*% b.mle
return(mu.gp1kl)
}
We also need to compute the “posterior weights” corresponding to each prior covariance matrix, which is simply the likelihood evaluated at that componenet times the prior weigth, \(pi_{k,l}\) normalized by the marginal likelihood over all components.
\(p(k=1,l=1|D)=\) \(\frac{p(D|k=1,l=1)*p(k=1,l=1)}{p(D)}\)
##' post.weight.func converts the matrix of likelihood for each gene snp pairs to matrix of posterior weights ##` for each componenet
##' @param pis = object from EM output with prior weight P(Z=K) as computed from
##' @param lik.mat = a JxK matrix of likelihoods (may be training set) for the P(D|Z=K)
##' @return a JxK vector of posterior weight
post.weight.func=function(pis,lik.mat){d=t(apply(lik.mat,1,function(x){x*pis$pihat}))
marg=rowSums(d)
return(d/marg)}
post.weights=as.matrix(post.weight.func(pis,lik.mat=lik.mat.real.nosfa))
##' @param U.0kl = l dimensional list of k dimensional list of prior covariance matrices
##' @param b.mle = Rx1 vector of beta.hats
##' @param V.gp.hat = Rx1 vector of standard errors
##' @param covmat = LxK dimenstional (unlisted list) of prior covariance matrices
##' @return post.means JxKxR array of posterior means, correspodning to the posterior mean ##' for the Jth individual in the Kth compoenent across all R tissues
##' @return post.covs JxKxR array of posterior vars, correspodning to the posterior vars ##' for the Jth individual in the Kth compoenent across all R tissues
##' @return post.ups JxKxR array of posterior tailprobs, corresponding to the marginal
##' upper tail probability for the Jth individual in the Kth compoenent across all R
##' @return post.ups JxKxR array of posterior tailprobs, corresponding to the marginal
##' upper tail probability for the Jth individual in the Kth component across all R
##' @return post.nulls JxKxR array of posterior nullprobs, corresponding to the marginal
##' "null probability"" for the Jth individual in the Kth component across all R
J=nrow(b.gp.hat)
post.means=array(NA,dim=c(J,15,43))
post.covs=array(NA,dim=c(J,15,43))
post.ups=array(NA,dim=c(J,15,43))
post.nulls=array(NA,dim=c(J,15,43))
post.downs=array(NA,dim=c(J,15,43))
covmat=unlist(U.0kl,recursive=F)
K=length(covmat)
for(j in 1:J){
for(k in 1:K){
b.mle=as.vector(t(b.gp.hat[j,]))##turn i into a R x 1 vector
V.gp.hat=diag(se.gp.hat[j,])^2
V.gp.hat.inv <- solve(V.gp.hat)
U.gp1kl <- (post.b.gpkl.cov(V.gp.hat.inv, covmat[[k]]))
mu.gp1kl <- as.array(post.b.gpkl.mean(b.mle, V.gp.hat.inv, U.gp1kl))
post.means[j,k,]=mu.gp1kl
post.covs[j,k,]=as.array(diag(U.gp1kl))
for(r in 1:R){##` marginal calculation for each tissue in each component
if(post.covs[j,k,r]==0){###if the covariance matrix has entry 0, then p(null=1)
post.ups[j,k,r]=0#need to figure out how to make the list have individual entries
post.downs[j,k,r]=0
post.nulls[j,k,r]=1}
else{
post.ups[j,k,r]=pnorm(0,mean=mu.gp1kl[r],sd=sqrt(diag(U.gp1kl)[r]),lower.tail=F)
post.downs[j,k,r]=pnorm(0,mean=mu.gp1kl[r],sd=sqrt(diag(U.gp1kl)[r]),lower.tail=T)
post.nulls[j,k,r]=0}
}
}
}
saveRDS(post.means,file=paste0("post.means.",A,".rds"))
saveRDS(post.covs,file=paste0("post.covs.",A,".rds"))
saveRDS(post.ups,file=paste0("post.ups.",A,".rds"))
saveRDS(post.nulls,file=paste0("post.nulls.",A,".rds"))
saveRDS(post.downs,file=paste0("post.downs.",A,".rds"))
Now, we develop a function to compute the weighted posterior quantities as weighted by post weights, which as described above are proportion to the marginal likelihood evaluated at \(\hat{\beta_{j}\) for each componenet times the prior weight computed from the training set.
A="real_noSFA"
post.means=readRDS(file=paste0("post.means.",A,".rds"))
post.covs=readRDS(file=paste0("post.covs.",A,".rds"))
post.ups=readRDS(file=paste0("post.ups.",A,".rds"))
post.nulls=readRDS(file=paste0("post.nulls.",A,".rds"))
post.downs=readRDS(file=paste0("post.downs.",A,".rds"))
dim(post.means)
## [1] 6904 15 43
(R=dim(post.means)[3])
## [1] 43
(K=dim(post.means)[2])
## [1] 15
(J=dim(post.means)[1])
## [1] 6904
##the posterior mean across all K componenets in all tissues
post.means[5,,1:R]
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.00000000 0.000000000 0.00000000 0.00000000 0.000000000 0.00000000
## [2,] 0.00000000 0.000000000 0.00000000 0.00000000 0.000000000 0.00000000
## [3,] 0.00000000 0.000000000 0.00000000 0.00000000 0.000000000 0.00000000
## [4,] 0.02290003 0.003485651 0.05191108 0.01596734 -0.006547240 0.05159979
## [5,] 0.02607980 0.014765309 0.05589450 0.02331858 0.029328776 0.04652404
## [6,] 0.02432681 0.028804210 0.03466902 0.03321027 0.055089010 0.04130294
## [7,] 0.02481314 0.003891903 0.05953386 0.01774742 -0.007539247 0.05605573
## [8,] 0.02719634 0.013975427 0.06226199 0.02379869 0.027238123 0.04965086
## [9,] 0.02443498 0.029039443 0.03539975 0.03403490 0.056253071 0.04207218
## [10,] 0.02979069 0.005074848 0.08428808 0.02283993 -0.010817184 0.06775838
## [11,] 0.03031474 0.010481605 0.08380724 0.02483032 0.012792295 0.06093511
## [12,] 0.02462214 0.029496628 0.03697791 0.03585041 0.058775937 0.04372894
## [13,] 0.03055691 0.005275276 0.08890914 0.02368962 -0.011438862 0.06957398
## [14,] 0.03086338 0.009695885 0.08814941 0.02507711 0.008537727 0.06348226
## [15,] 0.02464224 0.029551994 0.03718717 0.03609486 0.059111443 0.04394823
## [,7] [,8] [,9] [,10] [,11]
## [1,] 0.00000000 0.00000000 0.00000000 0.000000000 0.0000000000
## [2,] 0.00000000 0.00000000 0.00000000 0.000000000 0.0000000000
## [3,] 0.00000000 0.00000000 0.00000000 0.000000000 0.0000000000
## [4,] -0.04281887 -0.07559667 -0.05984827 0.037052970 0.0103501110
## [5,] -0.05077169 -0.09527035 -0.05779978 0.006800524 -0.0179421465
## [6,] -0.02609646 -0.03573180 -0.03354836 -0.007399074 -0.0263596706
## [7,] -0.05106912 -0.08813343 -0.07034797 0.043107383 0.0121014022
## [8,] -0.05872509 -0.10813007 -0.06569211 0.010652067 -0.0167436134
## [9,] -0.02718342 -0.03711657 -0.03461215 -0.007953520 -0.0272940680
## [10,] -0.08308734 -0.13187209 -0.10837389 0.064033331 0.0182913956
## [11,] -0.08743892 -0.14826232 -0.09790646 0.034402287 -0.0044866048
## [12,] -0.02961028 -0.04018555 -0.03693671 -0.009193357 -0.0293615388
## [13,] -0.09015258 -0.14059378 -0.11622707 0.068169858 0.0195408117
## [14,] -0.09357666 -0.15519970 -0.10594562 0.041659280 -0.0003255063
## [15,] -0.02994031 -0.04060048 -0.03724751 -0.009361999 -0.0296407393
## [,12] [,13] [,14] [,15] [,16]
## [1,] 0.00000000 0.00000000 0.00000000 0.00000000 0.000000000
## [2,] 0.00000000 0.00000000 0.00000000 0.00000000 0.000000000
## [3,] 0.00000000 0.00000000 0.00000000 0.00000000 0.000000000
## [4,] -0.08304014 -0.09751876 -0.08970230 0.04501713 0.023308142
## [5,] -0.07343960 -0.10572401 -0.08703168 0.01963656 0.004784911
## [6,] -0.02033742 -0.03545466 -0.03630261 -0.03756854 -0.038864072
## [7,] -0.09738214 -0.11537933 -0.10613128 0.05275203 0.027553699
## [8,] -0.08547498 -0.12194708 -0.10110495 0.02658929 0.008709573
## [9,] -0.02123888 -0.03683115 -0.03754913 -0.03898512 -0.040220636
## [10,] -0.14877122 -0.18207257 -0.16747874 0.08037089 0.043343746
## [11,] -0.13245105 -0.18215911 -0.15701006 0.05907319 0.028803362
## [12,] -0.02324788 -0.03988519 -0.04029314 -0.04213599 -0.043203698
## [13,] -0.15927767 -0.19625268 -0.18052223 0.08599910 0.046688162
## [14,] -0.14361049 -0.19543790 -0.17020115 0.06733998 0.034118698
## [15,] -0.02352064 -0.04029847 -0.04066219 -0.04256327 -0.043604538
## [,17] [,18] [,19] [,20] [,21]
## [1,] 0.000000000 0.00000000 0.000000000 0.000000000 0.00000000
## [2,] 0.000000000 0.00000000 0.000000000 0.000000000 0.00000000
## [3,] 0.000000000 0.00000000 0.000000000 0.000000000 0.00000000
## [4,] -0.016948296 -0.05701240 -0.012038159 0.013271581 0.01658248
## [5,] -0.013709368 -0.02502858 -0.005965046 0.009019846 0.02839672
## [6,] -0.007138673 0.05396803 0.039083734 -0.027485580 0.03075367
## [7,] -0.018937542 -0.06581059 -0.013113955 0.015237735 0.01862214
## [8,] -0.015697157 -0.03401061 -0.008100985 0.011392422 0.02904866
## [9,] -0.007377921 0.05510747 0.039746064 -0.028665694 0.03119534
## [10,] -0.024746637 -0.09519020 -0.015967914 0.021653596 0.02469786
## [11,] -0.022372600 -0.07200402 -0.014028627 0.019836747 0.03001578
## [12,] -0.007881408 0.05757240 0.041165356 -0.031301288 0.03212945
## [13,] -0.025733351 -0.10081607 -0.016414444 0.022856568 0.02574793
## [14,] -0.023696075 -0.08097456 -0.014941757 0.021431237 0.03005146
## [15,] -0.007946556 0.05789968 0.041352500 -0.031659772 0.03225133
## [,22] [,23] [,24] [,25] [,26]
## [1,] 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000000
## [2,] 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000000
## [3,] 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000000
## [4,] 0.03283174 -0.002581466 0.01940084 0.03869301 -0.0021681957
## [5,] 0.02554881 -0.012318254 0.02866351 0.03735058 0.0002023052
## [6,] 0.01992030 -0.010439396 0.01938687 0.02125701 0.0077075334
## [7,] 0.03763171 -0.002832338 0.02142177 0.04365364 -0.0024165566
## [8,] 0.02881077 -0.012650597 0.02965102 0.04089628 -0.0005228015
## [9,] 0.02019500 -0.011060690 0.01952115 0.02135754 0.0077790218
## [10,] 0.05318203 -0.003515653 0.02705910 0.05870658 -0.0031346995
## [11,] 0.04378817 -0.010572645 0.03138492 0.05368071 -0.0025282301
## [12,] 0.02078817 -0.012447018 0.01979401 0.02153102 0.0079504569
## [13,] 0.05607866 -0.003624971 0.02797948 0.06135102 -0.0032556366
## [14,] 0.04766980 -0.009599072 0.03145835 0.05654500 -0.0028219486
## [15,] 0.02086698 -0.012635332 0.01982860 0.02154956 0.0079752165
## [,27] [,28] [,29] [,30] [,31] [,32]
## [1,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## [2,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## [3,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## [4,] 0.04012491 0.05300968 0.02845660 0.01503398 0.02723631 0.01494740
## [5,] 0.07239408 0.05367702 0.03834342 0.03448685 0.02716419 0.03798794
## [6,] 0.05698100 0.02452449 0.03085201 0.04827166 0.02200573 0.05656399
## [7,] 0.04684600 0.05765963 0.03050974 0.01643771 0.03211738 0.01693245
## [8,] 0.07810829 0.05763130 0.03945905 0.03457109 0.03090185 0.03809014
## [9,] 0.05825015 0.02478197 0.03123258 0.04925139 0.02232239 0.05785351
## [10,] 0.07044599 0.06992760 0.03565472 0.02021216 0.05006007 0.02305632
## [11,] 0.09218179 0.06868807 0.04080175 0.03137873 0.04584040 0.03437468
## [12,] 0.06099714 0.02531505 0.03202601 0.05135670 0.02296297 0.06065475
## [13,] 0.07518026 0.07183818 0.03642248 0.02080944 0.05381833 0.02414800
## [14,] 0.09382894 0.07060042 0.04069005 0.03015200 0.04945232 0.03315816
## [15,] 0.06136196 0.02538348 0.03212834 0.05163470 0.02304322 0.06102788
## [,33] [,34] [,35] [,36] [,37]
## [1,] 0.00000000 0.00000000 0.0000000000 0.00000000 0.000000000
## [2,] 0.00000000 0.00000000 0.0000000000 0.00000000 0.000000000
## [3,] 0.00000000 0.00000000 0.0000000000 0.00000000 0.000000000
## [4,] -0.03022868 0.03969888 -0.0169712524 0.03911683 0.009147983
## [5,] -0.03554218 0.03334507 0.0024994007 0.03386403 0.001953412
## [6,] -0.02560596 0.02282630 0.0179229952 0.02052409 0.016345321
## [7,] -0.03558838 0.04673770 -0.0188759940 0.04234429 0.010861089
## [8,] -0.03958393 0.03927067 0.0001225528 0.03647974 0.001989186
## [9,] -0.02651204 0.02320815 0.0179783627 0.02071131 0.016384185
## [10,] -0.05514249 0.07241783 -0.0243393767 0.05071273 0.017364734
## [11,] -0.05416512 0.06440724 -0.0110340000 0.04554686 0.007228926
## [12,] -0.02849125 0.02404763 0.0180630255 0.02110500 0.016422694
## [13,] -0.05920905 0.07775839 -0.0252530166 0.05199725 0.018769647
## [14,] -0.05761369 0.07050458 -0.0138895771 0.04747008 0.009528017
## [15,] -0.02875572 0.02416075 0.0180707394 0.02115632 0.016423061
## [,38] [,39] [,40] [,41] [,42]
## [1,] 0.0000000000 0.000000000 0.00000000 0.00000000 0.000000000
## [2,] 0.0000000000 0.000000000 0.00000000 0.00000000 0.000000000
## [3,] 0.0000000000 0.000000000 0.00000000 0.00000000 0.000000000
## [4,] -0.0006094897 0.036380482 0.04184536 0.01694316 0.017507356
## [5,] 0.0014322741 0.027962253 0.02563402 0.03474680 0.036355729
## [6,] -0.0096126599 0.007140401 -0.01235141 0.03057203 0.007852876
## [7,] -0.0007164177 0.040838866 0.04725172 0.01842940 0.020900252
## [8,] 0.0008094022 0.031922270 0.03074195 0.03539981 0.040132129
## [9,] -0.0102096380 0.007016357 -0.01315938 0.03105955 0.007782210
## [10,] -0.0011036707 0.054098210 0.06371567 0.02235054 0.034128249
## [11,] -0.0011794491 0.047038595 0.05106469 0.03371420 0.050551747
## [12,] -0.0115737551 0.006731825 -0.01496234 0.03211419 0.007568769
## [13,] -0.0011836468 0.056386623 0.06661710 0.02296121 0.037060226
## [14,] -0.0014553025 0.050313704 0.05568550 0.03262905 0.051837384
## [15,] -0.0117625509 0.006692566 -0.01520722 0.03225443 0.007533915
## [,43]
## [1,] 0.000000000
## [2,] 0.000000000
## [3,] 0.000000000
## [4,] -0.004685240
## [5,] -0.001479814
## [6,] -0.006344371
## [7,] -0.005548084
## [8,] -0.001926199
## [9,] -0.006897421
## [10,] -0.008783120
## [11,] -0.004206853
## [12,] -0.008153932
## [13,] -0.009473617
## [14,] -0.005058369
## [15,] -0.008326999
##' error analysis to make sure the recording in the array is proper
j=5
k=5
b.mle=as.vector(t(b.gp.hat[j,]))##turn i into a R x 1 vector
V.gp.hat=diag(se.gp.hat[j,])^2
V.gp.hat.inv <- solve(V.gp.hat)
U.gp1kl <- (post.b.gpkl.cov(V.gp.hat.inv, covmat[[k]]))
mu.gp1kl <- as.array(post.b.gpkl.mean(b.mle, V.gp.hat.inv, U.gp1kl))
plot(post.means[j,k,],mu.gp1kl)
plot(diag(U.gp1kl),post.covs[j,k,])
rm(j)
rm(k)
##' @description total.mean function appplied to each J: posterior weight [k] by post.means [k,1;R]
##' @param post.weights JxK matrix of posterior weights
##' @param post.*** JxKxR array of posterior *** across all individuals, all compoenents all tissues
##' @return 1xR vector of weighted posterior means for an individual
##check to make sure working properly
post.means[5,5,1:R]*post.weights[5,5]
## [1] 8.530961e-06 4.829878e-06 1.828364e-05 7.627739e-06 9.593732e-06
## [6] 1.521847e-05 -1.660792e-05 -3.116387e-05 -1.890688e-05 2.224519e-06
## [11] -5.869053e-06 -2.402282e-05 -3.458337e-05 -2.846892e-05 6.423314e-06
## [16] 1.565192e-06 -4.484470e-06 -8.187097e-06 -1.951225e-06 2.950480e-06
## [21] 9.288849e-06 8.357269e-06 -4.029422e-06 9.376116e-06 1.221774e-05
## [26] 6.617601e-08 2.368082e-05 1.755828e-05 1.254251e-05 1.128099e-05
## [31] 8.885674e-06 1.242623e-05 -1.162620e-05 1.090750e-05 8.175786e-07
## [36] 1.107726e-05 6.389805e-07 4.685110e-07 9.146729e-06 8.385141e-06
## [41] 1.136602e-05 1.189232e-05 -4.840619e-07
weightedmeans=(as.matrix(post.means[5,,1:R]*post.weights[5,]))[5,]
plot(post.means[5,5,1:R]*post.weights[5,5],weightedmeans)
Now that we’ve assured ourselves the weighting and array order is accurate, we can compute the total weighted mean and relevant posterior quanitties as the sum of the componenet specific probability times the posterior probability of that componenet, \(\sum_{k} Posterior * P(Z=k|D)\)
total.mean=function(j){weightedmeans=(as.matrix(post.means[j,,1:R]*post.weights[j,]))
colSums(weightedmeans)}
total.up=function(j){colSums(as.matrix(post.ups[j,,1:R]*post.weights[j,]))}
total.down=function(j){colSums(as.matrix(post.downs[j,,1:R]*post.weights[j,]))}
total.null=function(j){colSums(as.matrix(post.nulls[j,,1:R]*post.weights[j,]))}
total.mean(j=5)
## [1] 0.0016502472 0.0019487074 0.0023579529 0.0022488520 0.0037273243
## [6] 0.0028025836 -0.0017777508 -0.0024425558 -0.0022829479 -0.0004971085
## [11] -0.0017847747 -0.0013965118 -0.0024272721 -0.0024783825 -0.0025289227
## [16] -0.0026212111 -0.0004862441 0.0036338928 0.0026356491 -0.0018519382
## [21] 0.0020847272 0.0013526966 -0.0007085413 0.0013177160 0.0014467657
## [26] 0.0005202159 0.0038690941 0.0016726154 0.0020946177 0.0032689368
## [31] 0.0014939615 0.0038296971 -0.0017396668 0.0015513607 0.0012103668
## [36] 0.0013961638 0.0011037174 -0.0006482503 0.0004910230 -0.0008251604
## [41] 0.0020745467 0.0005418506 -0.0004286396
Now that we have a function to compute the weighted posterior mean and relevant tail probabilities for each Gene-SNP pair, we can easily apply this to the whole data set.
##' @description Generate Jx43 Matrix with posterior mean for each tissue
##' @details apply total.mean to all J individuals.
all.mus=t(sapply(seq(1:J),function(j){total.mean(j)}))
all.upper=t(sapply(seq(1:J),function(j){total.up(j)}))
all.lower=t(sapply(seq(1:J),function(j){total.down(j)}))
all.nuller=t(sapply(seq(1:J),function(j){total.null(j)}))
##' @details compare.func finds the maximum tail prob in each tissue and takes the ##` complements for lfsr
compare.func=function(j){
as.matrix(apply(rbind(all.upper[j,],all.lower[j,]),2,function(j){1-max(j)}))}
lfsr.mat=t(sapply(seq(1:J),function(j){compare.func(j)}))
#A="realnoSFA"
#write.table(all.mus,paste0("post.mean.",A,".txt"))
# write.table(all.upper,paste0("post.up.",A,".txt"))
# write.table(all.lower,paste0("post.low.",A,".txt"))
# write.table(all.nuller,paste0("post.null.",A,".txt"))
# write.table(lfsr.mat,paste0("lfsr.",A,".txt"))
We can compute the overall posterior mean for each gene-SNP pair across tissues (i.e., the weighted \(R\) dimensional posterior mean vector of \(mu_{j}\)). Here, I do it for 10 gene SNP pairs.
Now we can make some plots in which we compare the posterior means among tissues and compare with the mles. We can also compare the lfsr with the lfdr in each tissue.
##' @param names = tissue names
##' @description posterior.means = JxR matrix of weighted posterior means
##' @description posterior.nulls = JxR matrix of weighted posterior null probabilities
names=read.table("tissue.names.txt")[-44,1]
#mse.nosfa=sum(post.means-b.gp.hat[1001:5000,])^2
A="realnoSFA"
posterior.means=as.matrix(read.table(paste0("post.mean.",A,".txt")))
lfsr.mat=as.matrix(read.table(paste0("lfsr.",A,".txt")))
posterior.nulls=as.matrix(read.table(paste0("post.null.",A,".txt")))
J=nrow(b.gp.hat)
(snps=sample(seq(1:J),10,replace=T,prob=rep(1/J,J)))
## [1] 6487 2337 4171 4492 5845 3679 3295 5156 468 6232
J=nrow(posterior.means)
(snps=sample(seq(1:J),10,replace=T,prob=rep(1/J,J)))
## [1] 2607 1457 97 4954 4332 1193 6013 2684 845 4193
for(x in 1:length(snps)){
j=snps[x]
par(mfrow=c(1,1))
b=barplot(abs(posterior.means[j,]),main=paste0("PostTissueMeans,B_",j,"andlfsr"),col=c(1:R),las=2,names=names,ylim=c(0,max(abs(posterior.means[j,]))+0.05))
lfsr=lfsr.mat[j,]
text(b, abs(posterior.means[j,]), labels=round(lfsr,2), pos=3, offset=.5)
#legend("topright",legend=round(lfsr,2),col=c(1:R),pch=1,title="lfsr")
par(mfrow=c(1,1))
b=barplot((posterior.means[j,]),main=paste0("PostTissueMeans,B_",j),col=c(1:R),las=2,names=names,ylim=c((min(posterior.means[j,])-0.05),max(abs(posterior.means[j,]))+0.05))
means=posterior.means[j,]
text(b, (posterior.means[j,]), labels=round(means,2), pos=3, offset=.5)
#legend("topright",legend=round(lfsr,2),col=c(1:R),pch=1,title="lfsr")
b.mle=t(b.gp.hat[j,])
a.m=abs(b.mle)
a.p=abs(posterior.means[j,])
low=min(a.m-0.01,a.p-0.01)
high=max(a.m+0.01,a.p+0.01)
plot(abs(b.mle),abs(posterior.means[j,]),main="PostMeanvsMLE",xlim=c(low,high),ylim=c(low,high),col=c(1:R),pch=5,)
abline()
}
par(mfrow=c(2,3))
for( r in seq(1:R)){
plot(posterior.nulls[,r],lfsr.mat[,r],main=paste0("lfsr",r,"vs lfdr", r))
}